Load the global variables
here::i_am("scripts/2.2_multiome_mouse_mammary_gland.Rmd")
mainDir = here::here()
source(knitr::purl(file.path(mainDir,"scripts/global_variables.Rmd"), quiet=TRUE))
source(knitr::purl(file.path(mainDir, "scripts/functions.Rmd"), quiet=TRUE))
set.seed(123)
inputDir_local = file.path(inputDir, "mm10", "mouse_mammary_gland", "one_cell_multiome")
outputDir_local = file.path(outputDir, "2.2_multiome_mouse_mammary_gland") ; if(!file.exists(outputDir_local)){dir.create(outputDir_local)}
outputDir_objects = file.path(outputDir_local, "objects") ; if(!file.exists(outputDir_objects)){dir.create(outputDir_objects)}
outputDir_plots = file.path(outputDir_local, "plots") ; if(!file.exists(outputDir_plots)){dir.create(outputDir_plots)}
outputDir_tables = file.path(outputDir_local, "tables") ; if(!file.exists(outputDir_tables)){dir.create(outputDir_tables)}
ld <- list.dirs(inputDir_local)
fragpaths <- list.files(ld[grepl("/.*fragmentFiles", ld)], full.names = TRUE)
fragpaths_k4 <- fragpaths[grepl(".*/h3k4me1/.*fragmentFiles/.*tsv.gz$", fragpaths)]
fragpaths_k27 <- fragpaths[grepl(".*/h3k27me3/.*fragmentFiles/.*tsv.gz$", fragpaths)]
fragpaths <- list(fragpaths_k27, fragpaths_k4)
names(fragpaths) <- c("h3k27me3", "h3k4me1")
rnapaths <- ld[grepl("/10XlikeMatrix_umi", ld)]
rm(ld)
consensus_peaks <- list("h3k27me3" = toGRanges(file.path(annotDir, "CREneg_peaks_h3k27me3.bed"), format="BED", header=FALSE),
"h3k4me1" = toGRanges(file.path(annotDir, "CREneg_peaks_h3k4me1.bed"), format="BED", header=FALSE))
ld <- list.dirs(inputDir)
bw_paths <- list.files(ld[grepl(".*/mouse_mammary_gland/bigwig.*", ld)], full.names = TRUE)
names(bw_paths) <- sub(".*D1888_CRE3-Mice8724_", "", bw_paths)
list_rna_bw <- list(rna_all = bw_paths[["all_rna_pseudobulk.bw"]],
rna_luminal = bw_paths[["cluster_1_rna_luminal.bw"]],
rna_basal = bw_paths[["cluster_0_rna_basal.bw"]],
rna_luminal_sc = bw_paths[["T0302_luminal_cell.bw"]],
rna_basal_sc = bw_paths[["T0290_basal_cell.bw"]])
rm(ld)
ld <- list.dirs(inputDir_local)
facs_path <- list.files(ld[grepl("/facs", ld)], full.names = TRUE)
facs_data <- read.csv(facs_path)
rm(ld)
row.names(facs_data) <- facs_data$CB
facs_data <- facs_data[, c(1,2)]
colnames(facs_data) <- c("CD24", "CD49f")
## loading RNA data
rna.data <- Read10X(data.dir = rnapaths)
rna_seurat <- CreateSeuratObject(counts = rna.data,
min.cells = 1,
min.features = -1,
project = "mouse_mammary_gland")
## adding FACS annotation
rna_seurat <- AddMetaData(
object = rna_seurat,
metadata = facs_data
)
## creating the multiome seurat objects
seurat_list <- list()
for (i in names(fragpaths)){
message(paste0("### Making fragment object for ", i))
total_counts <- CountFragments(fragpaths[[i]])
barcodes <- total_counts$CB
frags <- CreateFragmentObject(path = fragpaths[[i]], cells = barcodes)
message(paste0("### Making 50k bin matrix for ", i))
bin50k_kmatrix = GenomeBinMatrix(
frags,
genome = mm10,
cells = NULL,
binsize = 50000,
process_n = 10000,
sep = c(":", "_"),
verbose = TRUE)
message(paste0("### Making peak matrix for ", i))
peak_matrix = FeatureMatrix(
frags,
features = consensus_peaks[[i]],
cells = NULL,
sep = c("-", "-"),
verbose = TRUE)
message(paste0("### Creating chromatin assay for ", i))
chrom_assay <- CreateChromatinAssay(
counts = bin50k_kmatrix,
sep = c(":", "_"),
genome = "mm10",
fragments = fragpaths[[i]],
min.cells = 1,
min.features = -1)
message(paste0("### Creating Seurat object for ", i))
seurat <- CreateSeuratObject(
counts = chrom_assay,
assay = "bin_50k")
message(paste0("### Adding peak assay for ", i))
seurat[["peaks"]] <- CreateChromatinAssay(
counts = peak_matrix, genome = "mm10")
message(paste0("### Adding RNA assay for ", i))
seurat <- AddMetaData(seurat, rna_seurat@meta.data)
seurat[["RNA"]] <- rna_seurat@assays[["RNA"]]
Annotation(seurat) <- annotations_mm10
seurat <- AddMetaData(seurat, CountFragments(fragpaths[[i]]))
seurat <- FRiP(object = seurat, assay = "peaks", total.fragments = "reads_count")
seurat@meta.data[["orig.ident"]] <- i
seurat_list[[i]] <- seurat
rm(seurat)
rm(frags)
rm(total_counts)
rm(barcodes)
rm(fragments_per_cell)
rm(bin50k_matrix)
rm(peak_matrix)
rm(chromatin_assay)
}
saveRDS(seurat_list, file.path(outputDir_objects, "seurat_list.rds"))
## merging h3k4me1 and h3k27me3 data into one object
seurat <- merge(seurat_list[[1]], seurat_list[[2]])
seurat <- JoinLayers(seurat, assay = "RNA")
saveRDS(seurat, file.path(outputDir_objects, "seurat_cre_step1.rds"))
seurat <- readRDS(file.path(outputDir_objects, "seurat_cre_step1.rds"))
seurat_list <- readRDS(file.path(outputDir_objects, "seurat_list.rds"))
facs_data <- facs_data %>%
mutate(phenotype = case_when(
(CD24 > 100000) | (CD24 > 25000 & CD49f < 8900) ~ "luminal",
TRUE ~ "basal"
))
ggplot(facs_data, aes(x = CD49f, y = CD24, color = phenotype)) +
geom_point(size = 3) +
labs(x = "CD49f", y = "CD24", title = "CD24 vs CD49f by Phenotype") +
theme_minimal() +
scale_color_manual(values = c("luminal" = mypal[5], "basal" = mypal[4]))
ggplot(facs_data, aes(x = log(CD49f), y = log(CD24), color = phenotype)) +
geom_point(size = 3) +
labs(x = "log(CD49f)", y = "log(CD24)", title = "CD24 vs CD49f by Phenotype log") +
theme_minimal() +
scale_color_manual(values = c("luminal" = mypal[5], "basal" = mypal[4]))
##the data will appear as phenotype column
seurat <- AddMetaData(
object = seurat,
metadata = facs_data
)
for (s in seurat_list){
p1 <- plot_weighted_hist(s) + ggtitle(print(s@meta.data[["orig.ident"]][1]), " DNA fragments")
p2 <- plot_weighted_hist(s, assay = "RNA") + ggtitle(print(s@meta.data[["orig.ident"]][1]), " RNA reads")
p3 <- plot_weighted_hist(s, assay = "RNA_features") + ggtitle(print(s@meta.data[["orig.ident"]][1]), " RNA features")
print(p1)
print(p2)
print(p3)
rm(s)
}
## [1] "h3k27me3"
## [1] "h3k27me3"
## [1] "h3k27me3"
## [1] "h3k4me1"
## [1] "h3k4me1"
## [1] "h3k4me1"
### Filtering
min_reads_chromatin = 400
min_reads_rna = 1000
seurat@meta.data[["filtering"]] <- ifelse(seurat@meta.data$reads_count > min_reads_chromatin &
seurat@meta.data$nCount_RNA > min_reads_rna,
'pass', 'fail')
p_frip <- VlnPlot(subset(seurat, subset = filtering == 'pass'), c("FRiP"),
group.by = "orig.ident", split.by = NULL, pt.size = 0) +
labs(title = "FRiP CRE mice") +
stat_summary(fun.y = median, geom='point', size = 2, colour = "black") +
theme(legend.position = "none") +
scale_fill_manual(values = c(mypal[3], mypal[2]))
print(p_frip)
p_count <- VlnPlot(subset(seurat, subset = filtering == 'pass'), c("reads_count"),
group.by = "orig.ident", split.by = NULL, pt.size = 0, y.max = 12000) +
labs(title = "Unique fragments per cell CRE") +
stat_summary(fun.y = median, geom='point', size = 2, colour = "black") +
theme(legend.position = "none") +
scale_fill_manual(values = c(mypal[3], mypal[2]))
print(p_count)
p_count_rna <- VlnPlot(subset(seurat, subset = filtering == 'pass'), c("nCount_RNA"),
group.by = "orig.ident", split.by = NULL, pt.size = 0) +
labs(title = "Unique reads per cell RNA CRE") +
stat_summary(fun.y = median, geom='point', size = 2, colour = "black") +
theme(legend.position = "none") +
scale_fill_manual(values = c(mypal[3], mypal[2]))
print(p_count_rna)
p_genes_rna <- VlnPlot(subset(seurat, subset = filtering == 'pass'), c("nFeature_RNA"),
group.by = "orig.ident", split.by = NULL, pt.size = 0) +
labs(title = "Unique genes per cell CRE") +
stat_summary(fun.y = median, geom='point', size = 2, colour = "black") +
theme(legend.position = "none") +
scale_fill_manual(values = c(mypal[3], mypal[2]))
print(p_genes_rna)
ggsave(file.path(outputDir_plots, "frip_mouse_mammary_gland.pdf"),
plot = p_frip,
device = "pdf",
units = "mm",
width = 100,
height = 130)
ggsave(file.path(outputDir_plots, "frip_mouse_mammary_gland.pdf"),
plot = p_count,
device = "pdf",
units = "mm",
width = 100,
height = 130)
ggsave(file.path(outputDir_plots, "frip_mouse_mammary_gland.pdf"),
plot = p_genes_rna,
device = "pdf",
units = "mm",
width = 100,
height = 130)
ggsave(file.path(outputDir_plots, "frip_mouse_mammary_gland.pdf"),
plot = p_count_rna,
device = "pdf",
units = "mm",
width = 100,
height = 130)
seurat <- subset(seurat, subset = filtering == "pass")
frags_to_rna_reads <- ggplot(seurat@meta.data, aes(x = reads_count, y = nCount_RNA, color = orig.ident)) +
geom_point(alpha = 1, stroke = 0, size = 2) +
labs(x = "N unique DNA fragments",
y = "N unique RNA reads",
title = "N unique DNA fragments vs. N unique RNA reads") +
theme_classic() +
scale_color_manual(values = c(mypal[3], mypal[2]))
frags_to_genes <- ggplot(seurat@meta.data, aes(x = reads_count, y = nFeature_RNA, color = orig.ident)) +
geom_point(alpha = 1, stroke = 0, size = 2) +
labs(x = "N unique DNA fragments",
y = "N unique genes",
title = "N unique genes vs. N unique RNA reads") +
theme_classic() +
scale_color_manual(values = c(mypal[3], mypal[2]))
genes_to_rna_reads <- ggplot(seurat@meta.data, aes(x = nFeature_RNA, y = nCount_RNA, color = orig.ident)) +
geom_point(alpha = 1, stroke = 0, size = 2) +
labs(x = "N unique genes",
y = "N unique RNA reads",
title = "N unique genes vs. N unique RNA reads") +
theme_classic() +
scale_color_manual(values = c(mypal[3], mypal[2]))
frags_to_rna_reads
frags_to_genes
genes_to_rna_reads
ggsave(file.path(outputDir_plots, "frags_to_rna_reads.pdf"),
plot = frags_to_rna_reads,
device = "pdf",
units = "mm",
width = 120,
height = 80)
ggsave(file.path(outputDir_plots, "frags_to_genes.pdf"),
plot = frags_to_genes,
device = "pdf",
units = "mm",
width = 120,
height = 80)
ggsave(file.path(outputDir_plots, "genes_to_rna_reads.pdf"),
plot = genes_to_rna_reads,
device = "pdf",
units = "mm",
width = 120,
height = 80)
seurat <- subset(seurat, subset = filtering == 'pass')
DefaultAssay(seurat) <- "RNA"
seurat <- SCTransform(seurat)
seurat <- RunPCA(seurat)
seurat <- RunUMAP(seurat, dims = 1:30, verbose = FALSE, seed.use = 42)
seurat <- FindNeighbors(seurat, dims = 1:30, verbose = FALSE)
seurat <- FindClusters(seurat, resolution = 0.2)
DimPlot(seurat, cols = c(mypal[11], mypal[10]))
saveRDS(seurat, file.path(outputDir_objects, "seurat_cre_processed_step2.rds"))
seurat <- readRDS(file.path(outputDir_objects, "seurat_cre_processed_step2.rds"))
Separating h3k27me3 and h3k4me1 data
cre_list <- list("h3k4me1" = subset(seurat, subset = orig.ident == "h3k4me1"),
"h3k27me3" = subset(seurat, subset = orig.ident == "h3k27me3"))
Running normalization, dim reduction and checking the principal components.
for (mark in names(cre_list)){
DefaultAssay(cre_list[[mark]]) <- 'bin_50k'
cre_list[[mark]] <- RunTFIDF(cre_list[[mark]], assay = 'bin_50k')
cre_list[[mark]] <- FindTopFeatures(cre_list[[mark]], min.cutoff = 'q0')
cre_list[[mark]] <- RunSVD(cre_list[[mark]])
p <- DepthCor(cre_list[[mark]])
print(p)
}
Component 1 should not be used.
for (mark in names(cre_list)){
cre_list[[mark]] <- RunUMAP(object = cre_list[[mark]],
reduction = 'lsi',
dims = 2:30)
cre_list[[mark]] <- FindNeighbors(object = cre_list[[mark]],
reduction = 'lsi',
dims = 2:30)
cre_list[[mark]] <- FindClusters(object = cre_list[[mark]],
algorithm = 3,
resolution = 0.8)
#p <- DimPlot(cre_list[[mark]])
#print(p)
}
saveRDS(cre_list, file.path(outputDir_objects, "seurat_cre_list_by_mark_processed_step2.rds"))
cre_list <- readRDS(file.path(outputDir_objects, "seurat_cre_list_by_mark_processed_step2.rds"))
Quick UMAPs to see sequencing-FACS correspondence
DimPlot(object = seurat, label = TRUE, cols = c(mypal[11], mypal[10])) + labs(title = "RNA clusters")
DimPlot(object = seurat, label = TRUE, cols = c(mypal[4], mypal[5]), group.by = "phenotype") +
labs(title = "RNA clusters, FACS annotation")
DimPlot(object = cre_list[["h3k27me3"]], label = TRUE, cols = c(mypal[11], mypal[10])) + labs(title = "H3K27me3 clusters")
DimPlot(object = cre_list[["h3k27me3"]], label = TRUE, cols = c(mypal[4], mypal[5]), group.by = "phenotype") +
labs(title = "H3K27me3 clusters, FACS annotation")
DimPlot(object = cre_list[["h3k4me1"]], label = TRUE, cols = c(mypal[11], mypal[10])) + labs(title = "H3K4me1 clusters")
DimPlot(object = cre_list[["h3k4me1"]], label = TRUE, cols = c(mypal[4], mypal[5]), group.by = "phenotype") +
labs(title = "H3K4me1 clusters, FACS annotation")
In RNA and K27: cluster 0 = basal-like, cluster 1 = luminal-like. In K4
it is vice versa.
Adding this annotation to meta_data:
tmp_vector <- cre_list[["h3k27me3"]]@meta.data[["seurat_clusters"]]
annot_k27 <- ifelse(tmp_vector == 1, "luminal_like_k27", "basal_like_k27")
cre_list[["h3k27me3"]]@meta.data[["annot_k27"]] <- annot_k27
tmp_vector2 <- cre_list[["h3k27me3"]]@meta.data[["SCT_snn_res.0.2"]]
annot_rna <- ifelse(tmp_vector2 == 1, "luminal_rna", "basal_rna")
cre_list[["h3k27me3"]]@meta.data[["annot_rna"]] <- annot_rna
tmp_vector <- cre_list[["h3k4me1"]]@meta.data[["seurat_clusters"]]
annot_k4 <- ifelse(tmp_vector == 1, "basal_like_k4", "luminal_like_k4")
cre_list[["h3k4me1"]]@meta.data[["annot_k4"]] <- annot_k4
tmp_vector2 <- cre_list[["h3k4me1"]]@meta.data[["SCT_snn_res.0.2"]]
annot_rna <- ifelse(tmp_vector2 == 1, "luminal_rna", "basal_rna")
cre_list[["h3k4me1"]]@meta.data[["annot_rna"]] <- annot_rna
cre_list[["h3k4me1"]]<- SetIdent(cre_list[["h3k4me1"]], value = "annot_k4")
cre_list[["h3k27me3"]]<- SetIdent(cre_list[["h3k27me3"]], value = "annot_k27")
tmp_vector3 <- seurat@meta.data[["SCT_snn_res.0.2"]]
annot_rna <- ifelse(tmp_vector3 == 1, "luminal_rna", "basal_rna")
seurat@meta.data[["annot_rna"]] <- annot_rna
seurat <- SetIdent(seurat, value = "annot_rna")
saveRDS(cre_list, file.path(outputDir_objects, "seurat_cre_list_by_mark_processed_step3.rds"))
saveRDS(seurat, file.path(outputDir_objects, "seurat_cre_processed_step3.rds"))
Reload objects - step3
cre_list <- readRDS(file.path(outputDir_objects, "seurat_cre_list_by_mark_processed_step3.rds"))
seurat <- readRDS(file.path(outputDir_objects, "seurat_cre_processed_step3.rds"))
cre_umap_rna <- DimPlot(object = seurat, label = TRUE, cols = c(mypal[11], mypal[10]), group.by = "annot_rna") + labs(title = "RNA clusters")
cre_umap_rna_facs <- DimPlot(object = seurat, label = TRUE, cols = c(mypal[4], mypal[5]), group.by = "phenotype") +
labs(title = "RNA clusters, FACS annotation")
cre_umap_k4 <- DimPlot(object = cre_list[["h3k4me1"]], label = TRUE, cols = c(mypal[11], mypal[10]), group.by = "annot_k4") + labs(title = "H3K4me1 clusters")
cre_umap_k4_facs <- DimPlot(object = cre_list[["h3k4me1"]], label = TRUE, cols = c(mypal[4], mypal[5]), group.by = "phenotype") +
labs(title = "H3K4me1 clusters, FACS annotation")
cre_umap_k27 <- DimPlot(object = cre_list[["h3k27me3"]], label = TRUE, cols = c(mypal[11], mypal[10]), group.by = "annot_k27") + labs(title = "H3K27me3 clusters")
cre_umap_k27_facs <- DimPlot(object = cre_list[["h3k27me3"]], label = TRUE, cols = c(mypal[4], mypal[5]), group.by = "phenotype") +
labs(title = "H3K27me3 clusters, FACS annotation")
cre_umap_rna
cre_umap_rna_facs
cre_umap_k4
cre_umap_k4_facs
cre_umap_k27
cre_umap_k27_facs
ggsave(file.path(outputDir_plots, "cre_umap_rna.pdf"),
plot = cre_umap_rna,
device = "pdf",
units = "mm",
width = 120,
height = 100)
ggsave(file.path(outputDir_plots, "cre_umap_rna_facs.pdf"),
plot = cre_umap_rna_facs,
device = "pdf",
units = "mm",
width = 120,
height = 100)
ggsave(file.path(outputDir_plots, "cre_umap_k4.pdf"),
plot = cre_umap_k4,
device = "pdf",
units = "mm",
width = 100,
height = 100)
ggsave(file.path(outputDir_plots, "cre_umap_k4_facs.pdf"),
plot = cre_umap_k4_facs,
device = "pdf",
units = "mm",
width = 80,
height = 100)
ggsave(file.path(outputDir_plots, "cre_umap_k27.pdf"),
plot = cre_umap_k27,
device = "pdf",
units = "mm",
width = 100,
height = 100)
ggsave(file.path(outputDir_plots, "cre_umap_k27_facs.pdf"),
plot = cre_umap_k27_facs,
device = "pdf",
units = "mm",
width = 80,
height = 100)
# Extract UMAP coordinates from each representation
df1 <- cre_list[["h3k4me1"]]@reductions[["umap"]]@cell.embeddings %>% as.data.frame()
df2 <- seurat@reductions[["umap"]]@cell.embeddings %>% as.data.frame()
df3 <- cre_list[["h3k27me3"]]@reductions[["umap"]]@cell.embeddings %>% as.data.frame()
# Extract information about cell type or cluster
df1$cluster <- as.vector(cre_list[["h3k4me1"]]@meta.data[["annot_k4"]])
df2$cluster <- as.vector(seurat@meta.data[["annot_rna"]])
df3$cluster <- as.vector(cre_list[["h3k27me3"]]@meta.data[["annot_k27"]])
# Add the info about the mark
df1$mark <- "H3K4me1"
df2$mark <- "RNA"
df3$mark <- "H3K27me3"
# Shift umap_1 for each dataset
df1$umap_1 <- df1$umap_1 - 15 # Shift H3K4me1 left
df3$umap_1 <- df3$umap_1 + 20 # Shift H3K27me3 right
# Create a list and bind dataframes into one
dat.umap.lst <- list(df1, df2, df3)
dat.umap.long <- dat.umap.lst %>% bind_rows()
dat.umap.long$cell <- c(rownames(df1), rownames(df2), rownames(df3))
# Define color palette for clusters
cbPalette.ctype <- rep(c(mypal[10], mypal[11]), 3)
names(cbPalette.ctype) <- unique(dat.umap.long$cluster)
connected_umap_cre <- ggplot(dat.umap.long, aes(x = umap_1, y = umap_2, color = cluster, group = cell)) +
geom_point(alpha = 1) +
geom_path(alpha = 0.1) +
scale_color_manual(values = cbPalette.ctype) +
theme_classic() + ggtitle("H3K4me1 -> RNA -> H3K27me3")
connected_umap_cre
ggsave(file.path(outputDir_plots, "connected_umap_cre_larger.pdf"),
plot = connected_umap_cre,
device = "pdf",
units = "mm",
width = 260,
height = 100)
ggsave(file.path(outputDir_plots, "connected_umap_cre_smaller.pdf"),
plot = connected_umap_cre,
device = "pdf",
units = "mm",
width = 220,
height = 80)
rna_markers <- FindMarkers(seurat, assay = "SCT", ident.1 = "luminal_rna")
DotPlot(seurat,
features = rownames(rna_markers)[1:20],
cols = c("#EDE9E7", "#7e6148")) +
coord_flip() + labs(title = "Luminal vs basal markers, RNA")
write.csv(rna_markers, file.path(outputDir_tables, "cre_rna_cluster_markers.csv"))
DefaultAssay(cre_list[["h3k4me1"]]) <- 'bin_50k'
DefaultAssay(cre_list[["h3k27me3"]]) <- 'bin_50k'
da_bins_k4 <- FindMarkers(
object = cre_list[["h3k4me1"]],
ident.1 = "luminal_like_k4",
ident.2 = "basal_like_k4",
test.use = 'wilcox',
min.pct = 0.1
)
da_bins_k4$bin <- rownames(da_bins_k4)
da_bins_k4$query_region <- rownames(da_bins_k4)
da_bins_k27 <- FindMarkers(
object = cre_list[["h3k27me3"]],
ident.1 = "luminal_like_k27",
ident.2 = "basal_like_k27",
test.use = 'wilcox',
min.pct = 0.1
)
da_bins_k27$bin <- rownames(da_bins_k27)
da_bins_k27$query_region <- rownames(da_bins_k27)
DefaultAssay(cre_list[["h3k4me1"]]) <- 'peaks'
DefaultAssay(cre_list[["h3k27me3"]]) <- 'peaks'
da_peaks_k4 <- FindMarkers(
object = cre_list[["h3k4me1"]],
ident.1 = "luminal_like_k4",
ident.2 = "basal_like_k4",
test.use = 'wilcox',
min.pct = 0.1
)
da_peaks_k4$query_region <- rownames(da_peaks_k4) #to do inner_join with the closest feature
colnames(da_peaks_k4) <- c("p_val", "avg_log2FC", "pct.luminal_like", "pct.basal_like", "p_val_adj", "query_region")
da_peaks_k27 <- FindMarkers(
object = cre_list[["h3k27me3"]],
ident.1 = "luminal_like_k27",
ident.2 = "basal_like_k27",
test.use = 'wilcox',
min.pct = 0.1
)
da_peaks_k27$query_region <- rownames(da_peaks_k27)
colnames(da_peaks_k27) <- c("p_val", "avg_log2FC", "pct.luminal_like", "pct.basal_like", "p_val_adj", "query_region")
Finding closest features.
features_da_k27 <- ClosestFeature(cre_list[["h3k27me3"]], regions = rownames(da_peaks_k27), annotation = annotations_mm10)
features_da_k4 <- ClosestFeature(cre_list[["h3k4me1"]], regions = rownames(da_peaks_k4), annotation = annotations_mm10)
da_peaks_k27 <- inner_join(da_peaks_k27, features_da_k27, by = "query_region")
da_peaks_k4 <- inner_join(da_peaks_k4, features_da_k4, by = "query_region")
features_da_k27_bins <- ClosestFeature(cre_list[["h3k27me3"]], regions = rownames(da_bins_k27), annotation = annotations_mm10)
features_da_k4_bins <- ClosestFeature(cre_list[["h3k4me1"]], regions = rownames(da_bins_k4), annotation = annotations_mm10)
da_bins_k27 <- inner_join(da_bins_k27, features_da_k27_bins, by = "query_region")
da_bins_k4 <- inner_join(da_bins_k4, features_da_k4_bins, by = "query_region")
write.csv(da_peaks_k27, file.path(outputDir_tables, "cre_diff_peaks_with_closest_genes_k27.csv"))
write.csv(da_peaks_k4, file.path(outputDir_tables, "cre_diff_peaks_with_closest_genes_k4.csv"))
write.csv(da_bins_k27, file.path(outputDir_tables, "cre_diff_bins_with_closest_genes_k27.csv"))
write.csv(da_bins_k4, file.path(outputDir_tables, "cre_diff_bins_with_closest_genes_k4.csv"))
intersect(row.names(rna_markers[rna_markers$p_val_adj < 0.05, ]),
da_bins_k27[da_bins_k27$p_val < 0.05, ]$gene_name)
#Top 3: "Cxcl14" "Muc4" "Fst"
intersect(row.names(rna_markers[rna_markers$p_val_adj < 0.05, ]),
da_bins_k4[da_bins_k4$p_val < 0.05, ]$gene_name)
#Top 3: "Apoe" "Cxcl14" "Sparc"
roi = c("Cxcl14", "Muc4", "Fst", "Apoe","Sparc")
VlnPlot(seurat, roi, assay = "SCT", group.by = "annot_rna", cols = c(mypal[11], mypal[10]))
DefaultAssay(cre_list[["h3k4me1"]]) <- "bin_50k"
DefaultAssay(cre_list[["h3k27me3"]]) <- "bin_50k"
cre_list[["h3k4me1"]] <- SetIdent(cre_list[["h3k4me1"]], value = cre_list[["h3k4me1"]]@meta.data[["annot_k4"]])
cre_list[["h3k27me3"]] <- SetIdent(cre_list[["h3k27me3"]], value = cre_list[["h3k27me3"]]@meta.data[["annot_k27"]])
Sub-setting the max cell for single-cell tracks.
# take one well-covered luminal and basal cell that are non-ambiguously annotated by all annotations
max_cell_k27 <- subset(cre_list[["h3k27me3"]], cells = c("L539C172", "L539C186"))
max_cell_k4 <- subset(cre_list[["h3k4me1"]], cells = c("L539C302", "L539C290"))
Marker of basal cells by RNA. Different in H3K27me3 in basal-like and luminal-like cells by H3K27me3
#roi="Fst"
#chr13:114,404,115-114,509,113
roi <- GRanges(seqnames = Rle(c("chr13"), c(1)),
ranges = IRanges(114440000, end = 114500000, names = "Fst"))
gene_plot <- AnnotationPlot(
object = cre_list[["h3k4me1"]],
region = roi)
cov_plot_k27 <- CoveragePlot(
object = cre_list[["h3k27me3"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 800) +
ggtitle("H3K27me3 pseudobulk") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
p_fst_k27 <- CombineTracks(plotlist = list(cov_plot_k27,gene_plot),
heights = c(15,8)) &
theme(axis.title.y = element_text(size = 7))
p_fst_expression <- VlnPlot(seurat, "Fst", assay = "SCT", group.by = "annot_rna", cols = c(mypal[11], mypal[10]))
p_fst_k27
p_fst_expression
Marker of basal cells by RNA. Different in H3K4me1 in basal-like and luminal-like cells by H3K4me1
#roi="Apoe"
#chr7:19,684,744-19,704,653
roi <- GRanges(seqnames = Rle(c("chr7"), c(1)),
ranges = IRanges(19689094, end = 19702731, names = "Apoe"))
gene_plot <- AnnotationPlot(
object = cre_list[["h3k4me1"]],
region = roi)
cov_plot_k4 <- CoveragePlot(
object = cre_list[["h3k4me1"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 800) +
ggtitle("H3K4me1 pseudobulk") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
p_apoe_k4 <- CombineTracks(plotlist = list(cov_plot_k4,gene_plot),
heights = c(15,8)) &
theme(axis.title.y = element_text(size = 7))
p_apoe_expression <- VlnPlot(seurat, "Apoe", assay = "SCT", group.by = "annot_rna", cols = c(mypal[11], mypal[10]))
p_apoe_k4
p_apoe_expression
ggsave(file.path(outputDir_plots, "fst_tracks_by_epigenetic_annotation.pdf"),
plot = p_fst_k27,
device = "pdf",
units = "mm",
width = 200,
height = 100)
ggsave(file.path(outputDir_plots, "apoe_tracks_by_epigenetic_annotation.pdf"),
plot = p_apoe_k4,
device = "pdf",
units = "mm",
width = 200,
height = 100)
cre_list[["h3k4me1"]] <- SetIdent(cre_list[["h3k4me1"]], value = cre_list[["h3k4me1"]]@meta.data[["annot_rna"]])
cre_list[["h3k27me3"]] <- SetIdent(cre_list[["h3k27me3"]], value = cre_list[["h3k27me3"]]@meta.data[["annot_rna"]])
max_cell_k27 <- SetIdent(max_cell_k27, value = max_cell_k27@meta.data$annot_rna)
max_cell_k4 <- SetIdent(max_cell_k4, value = max_cell_k4@meta.data$annot_rna)
ggsave(file.path(outputDir_plots, "Plin2_tracks_by_rna.pdf"),
plot = p_Plin2,
device = "pdf",
units = "mm",
width = 200,
height = 240)
ggsave(file.path(outputDir_plots, "Plin2_tiles_by_rna.pdf"),
plot = tiles_Plin2,
device = "pdf",
units = "mm",
width = 200,
height = 150)
#Keratine locus
#chr11:100,117,140-100,448,915
roi <- GRanges(seqnames = Rle(c("chr11"), c(1)),
ranges = IRanges(start = 100117140, end = 100339961, names = "Krt"))
gene_plot <- AnnotationPlot(
object = cre_list[["h3k4me1"]],
region = roi)
cov_plot_k27 <- CoveragePlot(
object = cre_list[["h3k27me3"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 4000) +
ggtitle("H3K27me3 pseudobulk") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
cov_plot_k27_max <- CoveragePlot(
object = max_cell_k27,
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 4000) +
ggtitle("H3K27me3 single cell") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
cov_plot_k4 <- CoveragePlot(
object = cre_list[["h3k4me1"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 4000) +
ggtitle("H3K4me1 pseudobulk") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
cov_plot_k4_max <- CoveragePlot(
object = max_cell_k4,
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 4000) +
ggtitle("H3K4me1 single cell") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
cov_plot_rna <- BigwigTrack(
bigwig = list_rna_bw,
region = roi,
smooth = 1000,
type = "coverage",
y_label = "Normalised signal") +
scale_fill_manual(values = rep(mypal[1], 5)) +
theme(legend.position = "none") +
ggtitle("RNA pseudobulk")
p_krt <- CombineTracks(plotlist = list(cov_plot_rna,
cov_plot_k27_max, cov_plot_k27,
cov_plot_k4_max, cov_plot_k4, gene_plot),
heights = c(40,15,15,15,15,8)) &
theme(axis.title.y = element_text(size = 7))
tile_plot_k27 <- TilePlot(
object = cre_list[["h3k27me3"]],
region = roi,
tile.cells = 70,
tile.size = 2000) + scale_fill_gradient(low = "white", high = "#00806c")
tile_plot_k4 <- TilePlot(
object = cre_list[["h3k4me1"]],
region = roi,
tile.cells = 70,
tile.size = 2000) + scale_fill_gradient(low = "white", high = "#1c6779")
tiles_krt <- CombineTracks(plotlist = list(tile_plot_k27, tile_plot_k4),
heights = c(15,15)) & theme(axis.title.y = element_text(size = 7))
p_krt
tiles_krt
ggsave(file.path(outputDir_plots, "krt_tracks_by_rna.pdf"),
plot = p_krt,
device = "pdf",
units = "mm",
width = 200,
height = 240)
ggsave(file.path(outputDir_plots, "krt_tiles_by_rna.pdf"),
plot = tiles_krt,
device = "pdf",
units = "mm",
width = 200,
height = 150)
#roi="Krt19"
#chr11:100,138,496-100,148,799
roi <- GRanges(seqnames = Rle(c("chr11"), c(1)),
ranges = IRanges(100138496, end = 100148799, names = "Krt19"))
gene_plot <- AnnotationPlot(
object = cre_list[["h3k4me1"]],
region = roi)
cov_plot_k27 <- CoveragePlot(
object = cre_list[["h3k27me3"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K27me3 pseudobulk") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
cov_plot_k27_max <- CoveragePlot(
object = max_cell_k27,
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K27me3 single cell") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
cov_plot_k4 <- CoveragePlot(
object = cre_list[["h3k4me1"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K4me1 pseudobulk") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
cov_plot_k4_max <- CoveragePlot(
object = max_cell_k4,
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K4me1 single cell") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
cov_plot_rna <- BigwigTrack(
bigwig = list_rna_bw,
region = roi,
smooth = 200,
type = "coverage",
y_label = "Normalised signal") +
scale_fill_manual(values = rep(mypal[1], 5)) +
theme(legend.position = "none") +
ggtitle("RNA pseudobulk")
p_krt19 <- CombineTracks(plotlist = list(cov_plot_rna,
cov_plot_k27_max, cov_plot_k27,
cov_plot_k4_max, cov_plot_k4, gene_plot),
heights = c(30,15,15,15,15,8)) &
theme(axis.title.y = element_text(size = 7))
tile_plot_k27 <- TilePlot(
object = cre_list[["h3k27me3"]],
region = roi,
tile.cells = 70,
tile.size = 400) + scale_fill_gradient(low = "white", high = "#00806c")
tile_plot_k4 <- TilePlot(
object = cre_list[["h3k4me1"]],
region = roi,
tile.cells = 70,
tile.size = 400) + scale_fill_gradient(low = "white", high = "#1c6779")
tiles_krt19 <- CombineTracks(plotlist = list(tile_plot_k27, tile_plot_k4),
heights = c(15,15)) & theme(axis.title.y = element_text(size = 7))
p_krt19
tiles_krt19
ggsave(file.path(outputDir_plots, "krt19_tracks_by_rna.pdf"),
plot = p_krt19,
device = "pdf",
units = "mm",
width = 200,
height = 240)
ggsave(file.path(outputDir_plots, "krt19_tiles_by_rna.pdf"),
plot = tiles_krt19,
device = "pdf",
units = "mm",
width = 200,
height = 150)
#roi="Krt14"
#chr11:100,198,634-100,211,215
roi <- GRanges(seqnames = Rle(c("chr11"), c(1)),
ranges = IRanges(100198634, end = 100211215, names = "Krt14"))
gene_plot <- AnnotationPlot(
object = cre_list[["h3k4me1"]],
region = roi)
cov_plot_k27 <- CoveragePlot(
object = cre_list[["h3k27me3"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K27me3 pseudobulk") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
cov_plot_k27_max <- CoveragePlot(
object = max_cell_k27,
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K27me3 single cell") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
cov_plot_k4 <- CoveragePlot(
object = cre_list[["h3k4me1"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K4me1 pseudobulk") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
cov_plot_k4_max <- CoveragePlot(
object = max_cell_k4,
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K4me1 single cell") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
cov_plot_rna <- BigwigTrack(
bigwig = list_rna_bw,
region = roi,
smooth = 200,
type = "coverage",
y_label = "Normalised signal") +
scale_fill_manual(values = rep(mypal[1], 5)) +
theme(legend.position = "none") +
ggtitle("RNA pseudobulk")
p_krt14 <- CombineTracks(plotlist = list(cov_plot_rna,
cov_plot_k27_max, cov_plot_k27,
cov_plot_k4_max, cov_plot_k4, gene_plot),
heights = c(30,15,15,15,15,8)) &
theme(axis.title.y = element_text(size = 7))
tile_plot_k27 <- TilePlot(
object = cre_list[["h3k27me3"]],
region = roi,
tile.cells = 70,
tile.size = 200) + scale_fill_gradient(low = "white", high = "#00806c")
tile_plot_k4 <- TilePlot(
object = cre_list[["h3k4me1"]],
region = roi,
tile.cells = 70,
tile.size = 200) + scale_fill_gradient(low = "white", high = "#1c6779")
tiles_krt14 <- CombineTracks(plotlist = list(tile_plot_k27, tile_plot_k4),
heights = c(15,15)) & theme(axis.title.y = element_text(size = 7))
tiles_krt14
p_krt14
ggsave(file.path(outputDir_plots, "krt14_tracks_by_rna.pdf"),
plot = p_krt14,
device = "pdf",
units = "mm",
width = 200,
height = 240)
ggsave(file.path(outputDir_plots, "krt14_tiles_by_rna.pdf"),
plot = tiles_krt14,
device = "pdf",
units = "mm",
width = 200,
height = 150)
#roi="Krt17"
#chr11:100,252,878-100,265,917
roi <- GRanges(seqnames = Rle(c("chr11"), c(1)),
ranges = IRanges(100252878, end = 100265917, names = "Krt17"))
gene_plot <- AnnotationPlot(
object = cre_list[["h3k4me1"]],
region = roi)
cov_plot_k27 <- CoveragePlot(
object = cre_list[["h3k27me3"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K27me3 pseudobulk") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
cov_plot_k27_max <- CoveragePlot(
object = max_cell_k27,
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K27me3 single cell") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
cov_plot_k4 <- CoveragePlot(
object = cre_list[["h3k4me1"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K4me1 pseudobulk") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
cov_plot_k4_max <- CoveragePlot(
object = max_cell_k4,
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 400) +
ggtitle("H3K4me1 single cell") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
cov_plot_rna <- BigwigTrack(
bigwig = list_rna_bw,
region = roi,
smooth = 200,
type = "coverage",
y_label = "Normalised signal") +
scale_fill_manual(values = rep(mypal[1], 5)) +
theme(legend.position = "none") +
ggtitle("RNA pseudobulk")
p_krt17 <- CombineTracks(plotlist = list(cov_plot_rna,
cov_plot_k27_max, cov_plot_k27,
cov_plot_k4_max, cov_plot_k4, gene_plot),
heights = c(30,15,15,15,15,8)) &
theme(axis.title.y = element_text(size = 7))
tile_plot_k27 <- TilePlot(
object = cre_list[["h3k27me3"]],
region = roi,
tile.cells = 70,
tile.size = 200) + scale_fill_gradient(low = "white", high = "#00806c")
tile_plot_k4 <- TilePlot(
object = cre_list[["h3k4me1"]],
region = roi,
tile.cells = 70,
tile.size = 200) + scale_fill_gradient(low = "white", high = "#1c6779")
tiles_krt17 <- CombineTracks(plotlist = list(tile_plot_k27, tile_plot_k4),
heights = c(15,15)) & theme(axis.title.y = element_text(size = 7))
p_krt17
tiles_krt17
ggsave(file.path(outputDir_plots, "krt17_tracks_by_rna.pdf"),
plot = p_krt17,
device = "pdf",
units = "mm",
width = 200,
height = 240)
ggsave(file.path(outputDir_plots, "krt17_tiles_by_rna.pdf"),
plot = tiles_krt17,
device = "pdf",
units = "mm",
width = 200,
height = 150)
roi <- GRanges(seqnames = Rle(c("chr2"), c(1)),
ranges = IRanges(33005895, end = 34356499, names = "roi"))
gene_plot <- AnnotationPlot(
object = cre_list[["h3k4me1"]],
region = roi)
cov_plot_k27 <- CoveragePlot(
object = cre_list[["h3k27me3"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 10000) +
ggtitle("H3K27me3 pseudobulk") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
cov_plot_k27_max <- CoveragePlot(
object = max_cell_k27,
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 10000) +
ggtitle("H3K27me3 single cell") +
scale_fill_manual(values = c(mypal[3], mypal[3], mypal[3]))
cov_plot_k4 <- CoveragePlot(
object = cre_list[["h3k4me1"]],
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 10000) +
ggtitle("H3K4me1 pseudobulk") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
cov_plot_k4_max <- CoveragePlot(
object = max_cell_k4,
region = roi,
annotation = FALSE,
peaks = FALSE,
window = 10000) +
ggtitle("H3K4me1 single cell") +
scale_fill_manual(values = c(mypal[2], mypal[2], mypal[2]))
cov_plot_rna <- BigwigTrack(
bigwig = list_rna_bw,
region = roi,
smooth = 10000,
type = "coverage",
y_label = "Normalised signal") +
scale_fill_manual(values = rep(mypal[1], 5)) +
theme(legend.position = "none") +
ggtitle("RNA pseudobulk")
p <- CombineTracks(plotlist = list(cov_plot_rna,
cov_plot_k27_max, cov_plot_k27,
cov_plot_k4_max, cov_plot_k4, gene_plot),
heights = c(30,15,15,15,15,8)) &
theme(axis.title.y = element_text(size = 7))
tile_plot_k27 <- TilePlot(
object = cre_list[["h3k27me3"]],
region = roi,
tile.cells = 70,
tile.size = 5000) + scale_fill_gradient(low = "white", high = "#00806c")
tile_plot_k4 <- TilePlot(
object = cre_list[["h3k4me1"]],
region = roi,
tile.cells = 70,
tile.size = 5000) + scale_fill_gradient(low = "white", high = "#1c6779")
tiles <- CombineTracks(plotlist = list(tile_plot_k27, tile_plot_k4),
heights = c(15,15)) & theme(axis.title.y = element_text(size = 7))
tiles
p
ggsave(file.path(outputDir_plots, "region1_tracks_window10000_by_rna.pdf"),
plot = p,
device = "pdf",
units = "mm",
width = 200,
height = 240)
ggsave(file.path(outputDir_plots, "region1_tiles_window10000_by_rna.pdf"),
plot = tiles,
device = "pdf",
units = "mm",
width = 200,
height = 150)
roi_krt = c("Krt15", "Krt19", "Krt14", "Krt16", "Krt17", "Eif1")
p_genes_rna_krt <- VlnPlot(seurat, roi_krt, assay = "SCT", group.by = "annot_rna", cols = c(mypal[11], mypal[10]))
roi_region1 = c("Garnl3", "Ralgps1", "Zbtb34", "Lmx1b", "Mvb12b", "Gm13403", "C230014O12Rik", "Angptl2", "Zbtb43", "Gm13530", "9430024E24Rik", "Gm13404", "Pbx3")
p_genes_rna_region1 <- VlnPlot(seurat, roi_region1, assay = "SCT", group.by = "annot_rna", cols = c(mypal[11], mypal[10]))
p_genes_rna_krt
p_genes_rna_region1
ggsave(file.path(outputDir_plots, "genes_rna_region1.pdf"),
plot = p_genes_rna_region1,
device = "pdf",
units = "mm",
width = 250,
height = 250)
ggsave(file.path(outputDir_plots, "genes_rna_krt.pdf"),
plot = p_genes_rna_krt,
device = "pdf",
units = "mm",
width = 200,
height = 200)
gene_activity_k4 <- GeneActivity(cre_list[["h3k4me1"]],
assay = "bin_50k",
extend.upstream = 3000,
extend.downstream = 0,
process_n = 3000)
cre_list[["h3k4me1"]][['gene_activity']] <- CreateAssayObject(counts = gene_activity_k4)
gene_activity_k27 <- GeneActivity(cre_list[["h3k27me3"]],
assay = "bin_50k",
extend.upstream = 3000,
extend.downstream = 0,
process_n = 3000)
cre_list[["h3k27me3"]][['gene_activity']] <- CreateAssayObject(counts = gene_activity_k27)
DefaultAssay(cre_list[["h3k27me3"]]) <- 'gene_activity'
cre_list[["h3k27me3"]] <- SetIdent(cre_list[["h3k27me3"]], value = "annot_rna")
DefaultAssay(cre_list[["h3k4me1"]]) <- 'gene_activity'
cre_list[["h3k4me1"]] <- SetIdent(cre_list[["h3k4me1"]], value = "annot_rna")
da_activity_k27 <- FindMarkers(
object = cre_list[["h3k27me3"]],
ident.1 = "luminal_rna",
ident.2 = "basal_rna",
test.use = 'wilcox',
min.pct = 0.1
)
da_activity_k27 <- da_activity_k27[da_activity_k27$p_val < 0.05, ]
da_activity_k27$gene <- row.names(da_activity_k27)
colnames(da_activity_k27) <- c("pval_k27", "log2FC_k27_luminal_vs_basal", "pct.1", "pct.2", "padj_k27", "gene")
da_activity_k4 <- FindMarkers(
object = cre_list[["h3k4me1"]],
ident.1 = "luminal_rna",
ident.2 = "basal_rna",
test.use = 'wilcox',
min.pct = 0.1
)
da_activity_k4 <- da_activity_k4[da_activity_k4$p_val < 0.05, ]
da_activity_k4$gene <- row.names(da_activity_k4)
colnames(da_activity_k4) <- c("pval_k4", "log2FC_k4_luminal_vs_basal", "pct.1", "pct.2", "padj_k4", "gene")
meta_k4 <- data.frame(annot = cre_list[["h3k4me1"]]@meta.data[["annot_rna"]],
cb = cre_list[["h3k4me1"]]@meta.data[["CB"]])
meta_k27 <- data.frame(annot = cre_list[["h3k27me3"]]@meta.data[["annot_rna"]],
cb = cre_list[["h3k27me3"]]@meta.data[["CB"]])
luminal_matrix_k4 <- gene_activity_k4[, colnames(gene_activity_k4) %in% meta_k4$cb[meta_k4$annot == "luminal_rna"], drop = FALSE]
basal_matrix_k4 <- gene_activity_k4[, colnames(gene_activity_k4) %in% meta_k4$cb[meta_k4$annot == "basal_rna"], drop = FALSE]
luminal_matrix_k27 <- gene_activity_k27[, colnames(gene_activity_k27) %in% meta_k27$cb[meta_k27$annot == "luminal_rna"], drop = FALSE]
basal_matrix_k27 <- gene_activity_k27[, colnames(gene_activity_k27) %in% meta_k27$cb[meta_k27$annot == "basal_rna"], drop = FALSE]
df <- data.frame(
gene = rownames(gene_activity_k4),
basal_signal_k27 = rowSums(basal_matrix_k27),
basal_signal_k4 = rowSums(basal_matrix_k4),
luminal_signal_k27 = rowSums(luminal_matrix_k27),
luminal_signal_k4 = rowSums(luminal_matrix_k4),
pct_basal_k27 = rowSums(basal_matrix_k27 != 0) / ncol(basal_matrix_k27) * 100,
pct_luminal_k27 = rowSums(luminal_matrix_k27 != 0) / ncol(luminal_matrix_k27) * 100,
pct_basal_k4 = rowSums(basal_matrix_k4 != 0) / ncol(basal_matrix_k4) * 100,
pct_luminal_k4 = rowSums(luminal_matrix_k4 != 0) / ncol(luminal_matrix_k4) * 100
)
# filtering of low covered regions
df <- df[df$luminal_signal_k4 + df$basal_signal_k4 + df$luminal_signal_k27 + df$basal_signal_k27 > 25, ]
#as we have more basal than luminal cells, will normalize the signal, so that each cell has equal input
norm_factor <- length(meta_k4$cb[meta_k4$annot == "basal_rna"]) / length(meta_k4$cb[meta_k4$annot == "luminal_rna"])
df$basal_signal_k27_norm <- df$basal_signal_k27 / norm_factor
df$basal_signal_k4_norm <- df$basal_signal_k4 / norm_factor
df <- merge(df, da_activity_k27[, c("gene", "log2FC_k27_luminal_vs_basal")], by = "gene", all.x = TRUE)
df <- merge(df, da_activity_k4[, c("gene", "log2FC_k4_luminal_vs_basal")], by = "gene", all.x = TRUE)
# set the LF of all non-differential genes as 0
df <- df %>% mutate_all(~replace(., is.na(.), 0))
### For k4 only
df_k4 <- data.frame(
gene = rownames(gene_activity_k4),
basal_signal_k27 = rowSums(basal_matrix_k27),
basal_signal_k4 = rowSums(basal_matrix_k4),
luminal_signal_k27 = rowSums(luminal_matrix_k27),
luminal_signal_k4 = rowSums(luminal_matrix_k4),
pct_basal_k4 = rowSums(basal_matrix_k4 != 0) / ncol(basal_matrix_k4) * 100,
pct_luminal_k4 = rowSums(luminal_matrix_k4 != 0) / ncol(luminal_matrix_k4) * 100,
pct_basal_k27 = rowSums(basal_matrix_k27 != 0) / ncol(basal_matrix_k27) * 100,
pct_luminal_k27 = rowSums(luminal_matrix_k27 != 0) / ncol(luminal_matrix_k27) * 100)
df_k4 <- df_k4[df_k4$luminal_signal_k4 + df_k4$basal_signal_k4 > 10, ]
df_k4$basal_signal_k4_norm <- df_k4$basal_signal_k4 / norm_factor
df_k4 <- merge(df_k4, da_activity_k27[, c("gene", "log2FC_k27_luminal_vs_basal")], by = "gene", all.x = TRUE)
df_k4 <- merge(df_k4, da_activity_k4[, c("gene", "log2FC_k4_luminal_vs_basal")], by = "gene", all.x = TRUE)
df_k4 <- df_k4 %>% mutate_all(~replace(., is.na(.), 0))
### For k27 only
df_k27 <- data.frame(
gene = rownames(gene_activity_k4),
basal_signal_k27 = rowSums(basal_matrix_k27),
basal_signal_k4 = rowSums(basal_matrix_k4),
luminal_signal_k27 = rowSums(luminal_matrix_k27),
luminal_signal_k4 = rowSums(luminal_matrix_k4),
pct_basal_k27 = rowSums(basal_matrix_k27 != 0) / ncol(basal_matrix_k27) * 100,
pct_luminal_k27 = rowSums(luminal_matrix_k27 != 0) / ncol(luminal_matrix_k27) * 100,
pct_basal_k4 = rowSums(basal_matrix_k4 != 0) / ncol(basal_matrix_k4) * 100,
pct_luminal_k4 = rowSums(luminal_matrix_k4 != 0) / ncol(luminal_matrix_k4) * 100)
df_k27 <- df_k27[df_k27$luminal_signal_k27 + df_k27$basal_signal_k27 > 10, ]
df_k27$basal_signal_k27_norm <- df_k27$basal_signal_k27 / norm_factor
df_k27 <- merge(df_k27, da_activity_k27[, c("gene", "log2FC_k27_luminal_vs_basal")], by = "gene", all.x = TRUE)
df_k27 <- merge(df_k27, da_activity_k4[, c("gene", "log2FC_k4_luminal_vs_basal")], by = "gene", all.x = TRUE)
df_k27 <- df_k27 %>% mutate_all(~replace(., is.na(.), 0))
### Function to calculate entropy based on the transposed gene activity matrix
calculate_entropy <- function(gene_expression) {
# Normalize expression to get probabilities
total_expression <- sum(gene_expression)
if (total_expression == 0) {
return(0) # If no expression, entropy is 0
}
probabilities <- gene_expression / total_expression
entropy <- -sum(probabilities * log2(probabilities + 1e-10)) # Adding small value to avoid log(0)
return(entropy)
}
intercellular_entropy_k4 <- as.data.frame(apply(as.data.frame(t(gene_activity_k4)), 2, calculate_entropy)) #2 - columns
intercellular_entropy_k27 <- as.data.frame(apply(as.data.frame(t(gene_activity_k27)), 2, calculate_entropy))
colnames(intercellular_entropy_k4) <- "entropy_k4"
colnames(intercellular_entropy_k27) <- "entropy_k27"
intercellular_entropy_k27$gene <- row.names(intercellular_entropy_k27)
intercellular_entropy_k4$gene <- row.names(intercellular_entropy_k4)
df_k4 <- left_join(df_k4, intercellular_entropy_k4, by = "gene")
df_k4 <- left_join(df_k4, intercellular_entropy_k27, by = "gene")
df_k27 <- left_join(df_k27, intercellular_entropy_k27, by = "gene")
df_k27 <- left_join(df_k27, intercellular_entropy_k4, by = "gene")
## Separately for basal and luminal
intercellular_entropy_k27_luminal <- as.data.frame(apply(as.data.frame(t(luminal_matrix_k27)), 2, calculate_entropy))
colnames(intercellular_entropy_k27_luminal) <- "entropy_k27_luminal"
intercellular_entropy_k27_luminal$gene <- row.names(intercellular_entropy_k27_luminal)
df_k27 <- left_join(df_k27, intercellular_entropy_k27_luminal, by = "gene")
intercellular_entropy_k27_basal <- as.data.frame(apply(as.data.frame(t(basal_matrix_k27)), 2, calculate_entropy))
colnames(intercellular_entropy_k27_basal) <- "entropy_k27_basal"
intercellular_entropy_k27_basal$gene <- row.names(intercellular_entropy_k27_basal)
df_k27 <- left_join(df_k27, intercellular_entropy_k27_basal, by = "gene")
#luminal_matrix_k27
#basal_matrix_k27
All together
set.seed(123)
umap_gene_activity_lf_pval0.05 <- umap(df[, c("luminal_signal_k27", "luminal_signal_k4",
"basal_signal_k27_norm", "basal_signal_k4_norm",
"log2FC_k27_luminal_vs_basal", "log2FC_k4_luminal_vs_basal")],
n_neighbors = 200,
min_dist = 0.1)
saveRDS(umap_gene_activity_lf_pval0.05, file.path(outputDir_objects, "umap_gene_activity_filt25_lf_pval00.5.rds"))
Separate by mark
set.seed(123)
umap_gene_activity_lf_pval0.05_k4 <- umap(df_k4[, c("luminal_signal_k4", "basal_signal_k4_norm",
"log2FC_k4_luminal_vs_basal")],
n_neighbors = 200,
min_dist = 0.1)
umap_gene_activity_lf_pval0.05_k27 <- umap(df_k27[, c("luminal_signal_k27", "basal_signal_k27_norm",
"log2FC_k27_luminal_vs_basal")],
n_neighbors = 200,
min_dist = 0.1)
saveRDS(umap_gene_activity_lf_pval0.05_k27, file.path(outputDir_objects, "umap_gene_activity_filt10_lf_pval00.5_k27.rds"))
saveRDS(umap_gene_activity_lf_pval0.05_k4, file.path(outputDir_objects, "umap_gene_activity_filt10_lf_pval00.5_k4.rds"))
log1p_trans <- trans_new("log1p", transform = function(x) log(x + 1), inverse = function(x) exp(x) - 1)
umap_gene_activity_lf_pval0.05 <- readRDS(file.path(outputDir_objects, "umap_gene_activity_filt25_lf_pval00.5.rds"))
umap_embeddings <- as.data.frame(umap_gene_activity_lf_pval0.05$layout)
colnames(umap_embeddings) <- c("UMAP_1", "UMAP_2")
umap_embeddings$gene <- df$gene
umap_embeddings$signal_k4 <- df$basal_signal_k4 + df$luminal_signal_k4
umap_embeddings$signal_k27 <- df$basal_signal_k27 + df$luminal_signal_k27
umap_embeddings$signal_k4_basal <- df$basal_signal_k4
umap_embeddings$signal_k27_basal <- df$basal_signal_k27
umap_embeddings$signal_k4_luminal <- df$luminal_signal_k4
umap_embeddings$signal_k27_luminal <- df$luminal_signal_k27
umap_embeddings$log2FC_k27_luminal_vs_basal <- df$log2FC_k27_luminal_vs_basal
umap_embeddings$log2FC_k4_luminal_vs_basal <- df$log2FC_k4_luminal_vs_basal
umap_embeddings$signal_k4_basal_norm <- df$basal_signal_k4_norm
umap_embeddings$signal_k27_basal_norm <- df$basal_signal_k27_norm
p1 <- ggplot(umap_embeddings, aes(x = UMAP_1, y = UMAP_2, color = signal_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by total H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "total H3K27me3") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
p2 <- ggplot(umap_embeddings, aes(x = UMAP_1, y = UMAP_2, color = signal_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by total H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "total H3K4me1") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
p3 <- ggplot(umap_embeddings, aes(x = UMAP_1, y = UMAP_2, color = signal_k27_basal_norm, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by basal H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "basal H3K27me3") +
theme(legend.position = "right") +
theme_classic() +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
p4 <- ggplot(umap_embeddings, aes(x = UMAP_1, y = UMAP_2, color = signal_k27_luminal, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by luminal H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "luminal H3K27me3") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
p5 <- ggplot(umap_embeddings, aes(x = UMAP_1, y = UMAP_2, color = signal_k4_basal_norm, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by basal H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "basal H3K4me1") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
p6 <- ggplot(umap_embeddings, aes(x = UMAP_1, y = UMAP_2, color = signal_k4_luminal, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by luminal H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "luminal H3K4me1") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
p7 <- ggplot(umap_embeddings, aes(x = UMAP_1, y = UMAP_2, color = log2FC_k27_luminal_vs_basal, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by H3K27me3 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "log2FC H3K27me3 signal") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient2(
low = mypal[4],
mid = "lightgrey",
high = mypal[1],
midpoint = 0,
limits = c(-4, 4), # Full legend range
oob = scales::squish, # Caps the colors beyond the limits
breaks = c(-4, -2, 0, 2, 4), # Legend breaks
labels = c( "<-4", "-2", "0", "2", ">4") # Legend labels
)
p8 <- ggplot(umap_embeddings, aes(x = UMAP_1, y = UMAP_2, color = log2FC_k4_luminal_vs_basal, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by H3K4me1 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "log2FC H3K4me1 signal") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient2(
low = mypal[4],
mid = "lightgrey",
high = mypal[1],
midpoint = 0,
limits = c(-4, 4), # Full legend range
oob = scales::squish, # Caps the colors beyond the limits
breaks = c(-4, -2, 0, 2, 4), # Legend breaks
labels = c( "<-4", "-2", "0", "2", ">4") # Legend labels
)
umap_embeddings <- merge(umap_embeddings, da_activity_k27[, c("gene", "pval_k27", "padj_k27")], by = "gene", all.x = TRUE)
umap_embeddings <- merge(umap_embeddings, da_activity_k4[, c("gene", "pval_k4", "padj_k4")], by = "gene", all.x = TRUE)
p9 <- ggplot(umap_embeddings, aes(x = UMAP_1, y = UMAP_2, color = pval_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by pval of H3K27me3 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "pval") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient(low = "red", high = "blue")
p10 <- ggplot(umap_embeddings, aes(x = UMAP_1, y = UMAP_2, color = pval_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by padj of H3K4me1 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "pval") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient(low = "red", high = "blue")
p1
p2
p7
p8
umap_gene_activity_lf_pval0.05_k27 <- readRDS(file.path(outputDir_objects, "umap_gene_activity_filt10_lf_pval00.5_k27.rds"))
umap_gene_activity_lf_pval0.05_k4 <- readRDS(file.path(outputDir_objects, "umap_gene_activity_filt10_lf_pval00.5_k4.rds"))
log1p_trans <- trans_new("log1p", transform = function(x) log(x + 1), inverse = function(x) exp(x) - 1)
umap_embeddings_k27 <- as.data.frame(umap_gene_activity_lf_pval0.05_k27$layout)
colnames(umap_embeddings_k27) <- c("UMAP_1", "UMAP_2")
umap_embeddings_k27$gene <- df_k27$gene
umap_embeddings_k27 <- left_join(umap_embeddings_k27, df_k27, by = "gene")
umap_embeddings_k27$signal_k27 <- umap_embeddings_k27$basal_signal_k27 + umap_embeddings_k27$luminal_signal_k27
umap_embeddings_k27$signal_k4 <- umap_embeddings_k27$basal_signal_k4 + umap_embeddings_k27$luminal_signal_k4
umap_embeddings_k27$basal_signal_k4_norm <- umap_embeddings_k27$basal_signal_k4 / norm_factor
umap_embeddings_k27 <- merge(umap_embeddings_k27, da_activity_k27[, c("gene", "pval_k27", "padj_k27")], by = "gene", all.x = TRUE)
umap_embeddings_k27 <- merge(umap_embeddings_k27, da_activity_k4[, c("gene", "pval_k4", "padj_k4")], by = "gene", all.x = TRUE)
umap_embeddings_k27$pct_basal_k27 <- df_k27$pct_basal_k27
umap_embeddings_k27$pct_luminal_k27 <- df_k27$pct_luminal_k27
umap_embeddings_k27$entropy_k27 <- df_k27$entropy_k27
umap_embeddings_k27$entropy_k27_norm <- df_k27$entropy_k27_norm
umap_embeddings_k27$entropy_k4 <- df_k27$entropy_k4
umap_embeddings_k27$entropy_k4_norm <- df_k27$entropy_k4_norm
umap_embeddings_k27$entropy_k27_luminal <- df_k27$entropy_k27_luminal
umap_embeddings_k27$entropy_k27_basal <- df_k27$entropy_k27_basal
umap_embeddings_k27$entropy_k27_norm_binned <- df_k27$entropy_k27_norm_binned
umap_embeddings_k27$entropy_k4_norm_binned <- df_k27$entropy_k4_norm_binned
umap_embeddings_k27$entropy_k27_norm_binned <- df_k27$entropy_k27_basal_binned
umap_embeddings_k27$entropy_k4_norm_binned <- df_k27$entropy_k4_luminal_binned
umap_embeddings_k4 <- as.data.frame(umap_gene_activity_lf_pval0.05_k4$layout)
colnames(umap_embeddings_k4) <- c("UMAP_1", "UMAP_2")
umap_embeddings_k4$gene <- df_k4$gene
umap_embeddings_k4 <- left_join(umap_embeddings_k4, df_k4, by = "gene")
umap_embeddings_k4$signal_k27 <- umap_embeddings_k4$basal_signal_k27 + umap_embeddings_k4$luminal_signal_k27
umap_embeddings_k4$signal_k4 <- umap_embeddings_k4$basal_signal_k4 + umap_embeddings_k4$luminal_signal_k4
umap_embeddings_k4$basal_signal_k27_norm <- umap_embeddings_k4$basal_signal_k27 / norm_factor
umap_embeddings_k4 <- merge(umap_embeddings_k4, da_activity_k27[, c("gene", "pval_k27", "padj_k27")], by = "gene", all.x = TRUE)
umap_embeddings_k4 <- merge(umap_embeddings_k4, da_activity_k4[, c("gene", "pval_k4", "padj_k4")], by = "gene", all.x = TRUE)
umap_embeddings_k4$pct_basal_k4 <- df_k4$pct_basal_k4
umap_embeddings_k4$pct_luminal_k4 <- df_k4$pct_luminal_k4
umap_embeddings_k4$entropy_k27 <- df_k4$entropy_k27
umap_embeddings_k4$entropy_k27_norm <- df_k4$entropy_k27_norm
umap_embeddings_k4$entropy_k4 <- df_k4$entropy_k4
umap_embeddings_k4$entropy_k4_norm <- df_k4$entropy_k4_norm
### first look
ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = signal_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by total H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "total H3K27me3") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
### exclude the outlier dots with low coverage
umap_embeddings_k27 <- umap_embeddings_k27 %>%
dplyr::filter(UMAP_1 >= -15 & UMAP_1 <= 5, UMAP_2 >= -10 & UMAP_2 <= 10)
umap_k27_by_total_k27 <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = signal_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by total H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "total H3K27me3") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k27_by_total_k4 <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = signal_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by total H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "total H3K4me1") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k27_by_basal_k27 <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = basal_signal_k27_norm, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by basal H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "basal H3K27me3") +
theme(legend.position = "right") +
theme_classic() +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k27_by_luminal_k27 <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = luminal_signal_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by luminal H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "luminal H3K27me3") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k27_by_basal_k4 <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = basal_signal_k4_norm, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by basal H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "basal H3K4me1") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k27_by_luminal_k4 <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = luminal_signal_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by luminal H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "luminal H3K4me1") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k27_by_pct_basal <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = pct_basal_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by % of basal cells with signal in the gene") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "% of basal cells") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#89cac0", "#00A087E5", "#13695b", "#00483c", "#00201b", "#001612"),
breaks = c(0, 25, 50, 75, 100),
limits = c(0, 100))
umap_k27_by_pct_luminal <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = pct_luminal_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by % of luminal cells with signal in the gene") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "% of luminal cells") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#89cac0", "#00A087E5", "#13695b", "#00483c", "#00201b", "#001612"),
breaks = c(0, 25, 50, 75, 100),
limits = c(0, 100))
##### need to plot colored over gray
gray_points <- umap_embeddings_k27
colored_points <- umap_embeddings_k27[umap_embeddings_k27$pct_basal_k4 != 0, ]
umap_k27_by_pct_basal_k4 <- ggplot() +
geom_point(data = gray_points,
aes(x = UMAP_1, y = UMAP_2),
color = "lightgrey", size = 1) +
geom_point(data = colored_points,
aes(x = UMAP_1, y = UMAP_2,
color = pct_basal_k4),
size = 1) +
ggtitle("Genes colored by % of basal cells with signal in the gene") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "% of basal cells") +
theme_classic() +
theme(legend.position = "right") +
scale_color_gradientn(colors = c("#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#104d57", "#00201b"))
gray_points <- umap_embeddings_k27
colored_points <- umap_embeddings_k27[umap_embeddings_k27$pct_luminal_k4 != 0, ]
umap_k27_by_pct_luminal_k4 <- ggplot() +
geom_point(data = gray_points,
aes(x = UMAP_1, y = UMAP_2),
color = "lightgrey", size = 1) +
geom_point(data = colored_points,
aes(x = UMAP_1, y = UMAP_2,
color = pct_luminal_k4),
size = 1) +
ggtitle("Genes colored by % of luminal cells with signal in the gene") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "% of luminal cells") +
theme_classic() +
theme(legend.position = "right") +
scale_color_gradientn(colors = c("#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#104d57", "#00201b"))
#####
umap_k27_by_pct_all <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = pct_basal_k27 + pct_luminal_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by % of basal cells with signal in the gene") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "% of basal cells") +
theme(legend.position = "right") +
theme_classic() +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"))
umap_k27_by_pct_basal
umap_k27_by_pct_luminal
ggsave(file.path(outputDir_plots, "umap_k27_by_pct_basal_green.pdf"),
plot = umap_k27_by_pct_basal,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_pct_luminal_green.pdf"),
plot = umap_k27_by_pct_luminal,
device = "pdf",
units = "mm",
width = 133,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_pct_basal_k4_blue.pdf"),
plot = umap_k27_by_pct_basal_k4,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_pct_liminal_k4_blue.pdf"),
plot = umap_k27_by_pct_luminal_k4,
device = "pdf",
units = "mm",
width = 133,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_total_k27.pdf"),
plot = umap_k27_by_total_k27,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_total_k4.pdf"),
plot = umap_k27_by_total_k4,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_basal_k27.pdf"),
plot = umap_k27_by_basal_k27,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_luminal_k27.pdf"),
plot = umap_k27_by_luminal_k27,
device = "pdf",
units = "mm",
width = 133,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_basal_k4.pdf"),
plot = umap_k27_by_basal_k4,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_luminal_k4.pdf"),
plot = umap_k27_by_luminal_k4,
device = "pdf",
units = "mm",
width = 133,
height = 100)
### LF
## plotting non-zero points over the zero ones for better visualisation
gray_points <- umap_embeddings_k27
colored_points <- umap_embeddings_k27[umap_embeddings_k27$log2FC_k27_luminal_vs_basal != 0, ]
umap_k27_by_k27_fc <- ggplot() +
geom_point(data = gray_points,
aes(x = UMAP_1, y = UMAP_2),
color = "lightgrey", size = 1) +
geom_point(data = colored_points,
aes(x = UMAP_1, y = UMAP_2,
color = log2FC_k27_luminal_vs_basal),
size = 1) +
ggtitle("Genes colored by H3K27me3 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "log2FC H3K27me3 signal") +
theme_classic() +
theme(legend.position = "right") +
scale_color_gradient2(
low = mypal[4],
mid = "lightgrey",
high = mypal[1],
midpoint = 0,
limits = c(-4, 4),
oob = scales::squish,
breaks = c(-4, -2, 0, 2, 4),
labels = c("<-4", "-2", "0", "2", ">4")
)
gray_points <- umap_embeddings_k27
colored_points <- umap_embeddings_k27[umap_embeddings_k27$log2FC_k4_luminal_vs_basal != 0, ]
umap_k27_by_k4_fc <-ggplot() +
geom_point(data = gray_points,
aes(x = UMAP_1, y = UMAP_2),
color = "lightgrey", size = 1) +
geom_point(data = colored_points,
aes(x = UMAP_1, y = UMAP_2,
color = log2FC_k4_luminal_vs_basal),
size = 1) +
ggtitle("Genes colored by H3K4me1 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "log2FC H3K4me1 signal") +
theme_classic() +
theme(legend.position = "right") +
scale_color_gradient2(
low = mypal[4],
mid = "lightgrey",
high = mypal[1],
midpoint = 0,
limits = c(-4, 4),
oob = scales::squish,
breaks = c(-4, -2, 0, 2, 4),
labels = c("<-4", "-2", "0", "2", ">4")
)
### p-values
umap_k27_pval_k27 <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = pval_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by pval of H3K27me3 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "pval") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient(low = "red", high = "blue")
umap_k27_pval_k4 <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = pval_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by pval of H3K4me1 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "pval") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient(low = "red", high = "blue")
umap_k27_padj_k27 <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = padj_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by padj of H3K27me3 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "padj") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient(low = "red", high = "blue")
umap_k4_padj_k4 <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = padj_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by padj of H3K4me1 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "padj") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient(low = "red", high = "blue")
umap_k27_by_k27_fc
ggsave(file.path(outputDir_plots, "umap_k27_by_k4_fc.pdf"),
plot = umap_k27_by_k4_fc,
device = "pdf",
units = "mm",
width = 145,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_k27_fc.pdf"),
plot = umap_k27_by_k27_fc,
device = "pdf",
units = "mm",
width = 145,
height = 100)
umap_k27_by_k27_entropy <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = entropy_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by intercellular entropy of H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "Shannon's entropy") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#ede6ed", "#e6dae6", "#dbbfdb", "#c491c4", "#7A527A", "#4C334C", "#261b26"))
umap_k27_by_k4_entropy <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = entropy_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by intercellular entropy of H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "Shannon's entropy") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#ede6ed", "#e6dae6", "#dbbfdb", "#c491c4", "#7A527A", "#4C334C", "#261b26"))
umap_k27_by_k27_entropy_luminal <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = entropy_k27_luminal, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by intercellular entropy of H3K27me3 signal, luminal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "Shannon's entropy") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#ede6ed", "#e6dae6", "#dbbfdb", "#c491c4", "#7A527A", "#4C334C", "#261b26"))
umap_k27_by_k27_entropy_basal <- ggplot(umap_embeddings_k27, aes(x = UMAP_1, y = UMAP_2, color = entropy_k27_basal, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by intercellular entropy of H3K27me3 signal, basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "Shannon's entropy") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#ede6ed", "#e6dae6", "#dbbfdb", "#c491c4", "#7A527A", "#4C334C", "#261b26"))
umap_k27_by_k27_entropy
umap_k27_by_k4_entropy
ggsave(file.path(outputDir_plots, "umap_k27_by_k4_entropy.pdf"),
plot = umap_k27_by_k4_entropy,
device = "pdf",
units = "mm",
width = 134,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k27_by_k27_entropy.pdf"),
plot = umap_k27_by_k27_entropy,
device = "pdf",
units = "mm",
width = 134,
height = 100)
ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = signal_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by total H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "total H3K4me1") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
### exclude the outlier dots with low coverage
umap_embeddings_k4 <- umap_embeddings_k4 %>%
dplyr::filter(UMAP_1 >= -14 & UMAP_1 <= 5, UMAP_2 >= -5 & UMAP_2 <= 7)
umap_k4_by_total_k4 <-ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = signal_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by total H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "total H3K4me1") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k4_by_total_k27 <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = signal_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by total H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "total H3K27me3") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k4_by_basal_k4 <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = basal_signal_k4_norm, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by basal H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "basal H3K4me1") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k4_by_luminal_k4 <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = luminal_signal_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by luminal H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "luminal H3K4me1") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k4_by_basal_k27 <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = basal_signal_k27_norm, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by basal H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "basal H3K27me3") +
theme(legend.position = "right") +
theme_classic() +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k4_by_luminal_k27 <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = luminal_signal_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by luminal H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "luminal H3K27me3") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#D3D3D3", "#BECFCC", "#00A087E5", "#005043", "#00201b"),
trans = log1p_trans,
breaks = c(0, 10, 20, 50, 100, 200, 400, 600))
umap_k4_by_pct_basal <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = pct_basal_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by % of basal cells with signal in the gene") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "% of basal cells") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#104d57", "#00201b", "#001612"),
breaks = c(0, 25, 50, 75, 100),
limits = c(0, 100))
umap_k4_by_pct_luminal <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = pct_luminal_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by % of luminal cells with signal in the gene") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "% of luminal cells") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#D3D3D3", "#A3C5D0", "#4DBBD5E5", "#006D7F", "#104d57", "#00201b", "#001612"),
breaks = c(0, 25, 50, 75, 100),
limits = c(0, 100))
gray_points <- umap_embeddings_k4
colored_points <- umap_embeddings_k4[umap_embeddings_k4$pct_basal_k27 != 0, ]
umap_k4_by_pct_basal_k27 <- ggplot() +
geom_point(data = gray_points,
aes(x = UMAP_1, y = UMAP_2),
color = "lightgrey", size = 1) +
geom_point(data = colored_points,
aes(x = UMAP_1, y = UMAP_2,
color = pct_basal_k4),
size = 1) +
ggtitle("Genes colored by % of basal cells with signal in the gene") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "% of basal cells") +
theme_classic() +
theme(legend.position = "right") +
scale_color_gradientn(colors = c("#D3D3D3", "#89cac0", "#00A087E5", "#13695b", "#00483c", "#00201b"))
gray_points <- umap_embeddings_k4
colored_points <- umap_embeddings_k4[umap_embeddings_k4$pct_luminal_k27 != 0, ]
umap_k4_by_pct_luminal_k27 <- ggplot() +
geom_point(data = gray_points,
aes(x = UMAP_1, y = UMAP_2),
color = "lightgrey", size = 1) +
geom_point(data = colored_points,
aes(x = UMAP_1, y = UMAP_2,
color = pct_luminal_k4),
size = 1) +
ggtitle("Genes colored by % of luminal cells with signal in the gene") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "% of luminal cells") +
theme_classic() +
theme(legend.position = "right") +
scale_color_gradientn(colors = c("#D3D3D3", "#89cac0", "#00A087E5", "#13695b", "#00483c", "#00201b"))
umap_k4_by_pct_basal
umap_k4_by_pct_luminal
ggsave(file.path(outputDir_plots, "umap_k4_by_pct_basal_k27_green.pdf"),
plot = umap_k4_by_pct_basal_k27,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_pct_liminal_k27_green.pdf"),
plot = umap_k4_by_pct_luminal_k27,
device = "pdf",
units = "mm",
width = 133,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_pct_basal_blue.pdf"),
plot = umap_k4_by_pct_basal,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_pct_liminal_blue.pdf"),
plot = umap_k4_by_pct_luminal,
device = "pdf",
units = "mm",
width = 133,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_total_k27.pdf"),
plot = umap_k4_by_total_k27,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_total_k4.pdf"),
plot = umap_k4_by_total_k4,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_basal_k27.pdf"),
plot = umap_k4_by_basal_k27,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_luminal_k27.pdf"),
plot = umap_k4_by_luminal_k27,
device = "pdf",
units = "mm",
width = 133,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_basal_k4.pdf"),
plot = umap_k4_by_basal_k4,
device = "pdf",
units = "mm",
width = 130,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_luminal_k4.pdf"),
plot = umap_k4_by_luminal_k4,
device = "pdf",
units = "mm",
width = 133,
height = 100)
### LF
## plotting non-zero points over the zero ones for better visualisation
gray_points <- umap_embeddings_k4
colored_points <- umap_embeddings_k4[umap_embeddings_k4$log2FC_k4_luminal_vs_basal != 0, ]
umap_k4_by_k4_fc <- ggplot() +
geom_point(data = gray_points,
aes(x = UMAP_1, y = UMAP_2),
color = "lightgrey", size = 1) +
geom_point(data = colored_points,
aes(x = UMAP_1, y = UMAP_2,
color = log2FC_k4_luminal_vs_basal),
size = 1) +
ggtitle("Genes colored by H3K4me1 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "log2FC H3K4me1 signal") +
theme_classic() +
theme(legend.position = "right") +
scale_color_gradient2(
low = mypal[4],
mid = "lightgrey",
high = mypal[1],
midpoint = 0,
limits = c(-4, 4),
oob = scales::squish,
breaks = c(-4, -2, 0, 2, 4),
labels = c("<-4", "-2", "0", "2", ">4")
)
gray_points <- umap_embeddings_k4
colored_points <- umap_embeddings_k4[umap_embeddings_k4$log2FC_k27_luminal_vs_basal != 0, ]
umap_k4_by_k27_fc <- ggplot() +
geom_point(data = gray_points,
aes(x = UMAP_1, y = UMAP_2),
color = "lightgrey", size = 1) +
geom_point(data = colored_points,
aes(x = UMAP_1, y = UMAP_2,
color = log2FC_k27_luminal_vs_basal),
size = 1) +
ggtitle("Genes colored by H3K27me3 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "log2FC H3K27me3 signal") +
theme_classic() +
theme(legend.position = "right") +
scale_color_gradient2(
low = mypal[4],
mid = "lightgrey",
high = mypal[1],
midpoint = 0,
limits = c(-4, 4),
oob = scales::squish,
breaks = c(-4, -2, 0, 2, 4),
labels = c("<-4", "-2", "0", "2", ">4")
)
### p-values
umap_k4_by_k27_pval <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = pval_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by pval of H3K27me3 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "pval") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient(low = "red", high = "blue")
umap_k4_by_k4_pval <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = pval_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by pval of H3K4me1 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "pval") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient(low = "red", high = "blue")
umap_k4_by_k27_padj <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = padj_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by padj of H3K27me3 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "padj") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient(low = "red", high = "blue")
umap_k4_by_k4_padj <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = padj_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by padj of H3K4me1 signal change in luminal vs basal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "padj") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradient(low = "red", high = "blue")
umap_k4_by_k4_fc
ggsave(file.path(outputDir_plots, "umap_k4_by_k4_fc.pdf"),
plot = umap_k4_by_k4_fc,
device = "pdf",
units = "mm",
width = 145,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_k27_fc.pdf"),
plot = umap_k4_by_k27_fc,
device = "pdf",
units = "mm",
width = 145,
height = 100)
umap_k4_by_k4_entropy <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = entropy_k4, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by intercellular entropy of H3K4me1 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "Shannon's entropy") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#ede6ed", "#e6dae6", "#dbbfdb", "#c491c4", "#7A527A", "#4C334C", "#261b26"))
umap_k4_by_k27_entropy <- ggplot(umap_embeddings_k4, aes(x = UMAP_1, y = UMAP_2, color = entropy_k27, label = gene)) +
geom_point(size = 1) +
theme_minimal() +
ggtitle("Genes colored by intercellular entropy of H3K27me3 signal") +
xlab("UMAP Dimension 1") +
ylab("UMAP Dimension 2") +
labs(color = "Shannon's entropy") +
theme(legend.position = "right") +
theme_classic() +
scale_color_gradientn(colors = c("#ede6ed", "#e6dae6", "#dbbfdb", "#c491c4", "#7A527A", "#4C334C", "#261b26"))
umap_k4_by_k4_entropy
umap_k4_by_k27_entropy
ggsave(file.path(outputDir_plots, "umap_k4_by_k4_entropy.pdf"),
plot = umap_k4_by_k4_entropy,
device = "pdf",
units = "mm",
width = 134,
height = 100)
ggsave(file.path(outputDir_plots, "umap_k4_by_k27_entropy.pdf"),
plot = umap_k4_by_k27_entropy,
device = "pdf",
units = "mm",
width = 134,
height = 100)